import cv2
import gym
#import highway_env
#from Python.highway-env-master import setup.py
import sys

import highway_env
from stable_baselines3 import DDPG
import torch
import pprint

frameSize = (640,280)
#out = cv2.VideoWriter('video-DQN-baseline3.avi',cv2.VideoWriter_fourcc(*'DIVX'), 4, frameSize)
out = cv2.VideoWriter('video-DDPG1.avi',cv2.VideoWriter_fourcc(*'DIVX'), 4, frameSize)


#Change which environment to use here:
#Remember to change the name of model.save and model.load

# env = gym.make("racetrack-v1")
env = gym.make("intersection-v1")


env.configure({
    "screen_width": 640,
    "screen_height": 280,
    #"normalize_reward": False
})

env.reset()

pprint.pprint(env.config)

model = DDPG('MlpPolicy', env,
              policy_kwargs=dict(net_arch=[256, 256]),
              learning_rate=5e-3,#5e-4,
              buffer_size=15000,
              learning_starts=200,
              batch_size=32,
             #tau = #betweeon 0,1
              gamma=0.8,
              train_freq=1,
              gradient_steps=1,
             #action_noise = #Optional
             #replay_buffer_class #Optional
             #replay_buffer_kwargs #Optional

             optimize_memory_usage = False, #(bool)  Enable a memory efficient variant of
             #the replay buffer at a cost of more complexity

              verbose=1,
             #seed Optional
             #device = Auto, #Device (cpu, cuda, …)
             #on which the code should be run. Setting it to auto, the code will be run on the GPU if possible.
              tensorboard_log="racetrack_ddpg/")
# uncomment the lines below if you want to train a new model

model.learn(total_timesteps=int(5e3))




#Change name of model.save:

model.save("highway_dqn/model")

# print()
# print("Done Learning!!")
# print()





########## Load and test saved model##############


#Change name of model.load:

model = DDPG.load("highway_dqn/model")
#while True:
for f in range(40):
  done = truncated = False
  obs, info = env.reset()
  while not (done or truncated):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = env.step(action)#env.step(action.item(0))

    #print(reward)
    #print(info)
    #input("Press Enter to continue...")

    env.render()
    cur_frame = env.render(mode="rgb_array")
    out.write(cur_frame)


#cur_frame = env.render(mode="rgb_array")
#out.write(cur_frame)

out.release()

print('DONE')


#print(env_reward())

#NOTE
#rewards is the gives rewards along different categories,
#reward combines the values from rewards into 1 value
#reward does this calculation using config and rewards

